# SETWD ===========================
rm(list=ls())


# LIBRARIES ===========================

library(tidyverse)
library(foreign)
library(stargazer)
library(lmtest)
library(multiwayvcov)

# IMPORT DATA ===========================

dfgi <- read.csv("data/sve-08_04_24.csv",stringsAsFactors = F)

## FIG A1. Robustness to Alternative Measures of Inequality ----

vars <- c("gini_disp","wiid.gini",
          "wb.gini","wid.gini",
          "pre.wid.gini",
          "inc.top1","inc.top10","inc.bot50",
          "wealth.top1","wealth.top10","wealth.bot50")

beta <- rep(NA,length(vars))
se <- rep(NA,length(vars))
obs <- rep(NA,length(vars))
beta2 <- rep(NA,length(vars))
se2 <- rep(NA,length(vars))
obs2 <- rep(NA,length(vars))


for (i in 1:length(vars)) {
  g <- vars[i]
  temp <- dfgi %>%
    filter(!is.na(!!sym(g)))%>%
    mutate(x=!!sym(g))
  m1 <- glm(erosion.strict ~ x, data = temp, family = binomial)
  crse <- coeftest(m1, cluster.vcov(m1, temp$country.name))
  beta[i] <- crse[2,1]
  se[i] <- crse[2,2]
  obs[i] <- nobs(crse)
  
  m1 <- glm(erosion.strict ~ log(gdppc) + year + x,data=temp, family=binomial)
  crse <- coeftest(m1, cluster.vcov(m1, temp$country.name))
  beta2[i] <- crse[4,1]
  se2[i] <- crse[4,2]
  obs2[i] <- nobs(crse)
}

res <- data.frame(vars,beta,se,obs,beta2,se2,obs2)

res2 <- res%>%
  mutate(var=vars,var.num=row_number())%>%
  mutate(var=recode(var,"gini_disp"="Gini - SWIID",
                    "inc.top1"="Top 1% Income Share - WID",
                    "inc.top10"="Top 10% Income Share - WID",
                    "inc.bot50"="Bottom 50% Income Share - WID",
                    "wealth.top1"="Top 1% Wealth Share - WID",
                    "wealth.top10"="Top 10% Wealth Share - WID",
                    "wealth.bot50"="Bottom 50% Wealth Share - WID",
                    "pre.wid.gini"="Pre-fisc Gini - WID",
                    "wid.gini"="Gini - WID",
                    "wiid.gini"="Gini - WIID",
                    "wb.gini"="Gini - World Bank"))%>%
  mutate(var.lab=paste(var," (n=",obs,")",sep=""))%>%
  mutate(vf=as.factor(var.lab))%>%
  mutate(vf=fct_reorder(vf, -var.num))

ggplot(res2[1:5,],aes(x=beta,y=vf))+
  geom_vline(xintercept=0,color="gray60")+
  geom_errorbarh(aes(xmin=beta-1.96*se,xmax=beta+1.96*se),height=0,
                 position = position_nudge(y = 0.1))+
  geom_point(position = position_nudge(y = 0.1))+
  geom_errorbarh(aes(xmin=beta2-1.96*se2,xmax=beta2+1.96*se2),height=0,
                 color="gray70",position = position_nudge(y = -0.1))+
  geom_point(aes(x=beta2),color="gray70",position = position_nudge(y = -0.1))+
  ylab("")+
  xlab("Estimated Coefficient")+
  theme_bw()

ggsave("figures/fig-a1.png",width=4.25,height=2.75)

## Tables A5-A7 ----

vars <- c("gini_disp","wiid.gini",
          "wb.gini","wid.gini",
          "pre.wid.gini",
          "inc.top1","inc.top10","inc.bot50",
          "wealth.top1","wealth.top10","wealth.bot50")

vars2 <- recode(vars,"gini_disp"="Gini - SWIID",
                "inc.top1"="Top 1% Income Share - WID",
                "inc.top10"="Top 10% Income Share - WID",
                "inc.bot50"="Bottom 50% Income Share - WID",
                "wealth.top1"="Top 1% Wealth Share - WID",
                "wealth.top10"="Top 10% Wealth Share - WID",
                "wealth.bot50"="Bottom 50% Wealth Share - WID",
                "pre.wid.gini"="Pre-fisc Gini - WID",
                "wid.gini"="Gini - WID",
                "wiid.gini"="Gini - WIID",
                "wb.gini"="Gini - World Bank")


### Bivariate ---

fun.ma <- function(g){
  temp <- dfgi%>%filter(!is.na(g))
  formula <- as.formula(paste("erosion.strict ~",g))
  m1 <- glm(formula=formula, data = temp, family = binomial)
  coeftest(m1, cluster.vcov(m1, temp$country.name))
}

length(vars)
m1 <- fun.ma(vars[1])
m2 <- fun.ma(vars[2])
m3 <- fun.ma(vars[3])
m4 <- fun.ma(vars[4])
m5 <- fun.ma(vars[5])
m6 <- fun.ma(vars[6])
m7 <- fun.ma(vars[7])
m8 <- fun.ma(vars[8])
m9 <- fun.ma(vars[9])
m10 <- fun.ma(vars[10])
m11 <- fun.ma(vars[11])



### GDP ----

fun.mb <- function(g){
  temp <- dfgi%>%filter(!is.na(g))
  formula <- as.formula(paste("erosion.strict ~ ",g," + log(gdppc)"))
  m1 <- glm(formula=formula, data = temp, family = binomial)
  coeftest(m1, cluster.vcov(m1, temp$country.name))
}


length(vars)
m1b <- fun.mb(vars[1])
m2b <- fun.mb(vars[2])
m3b <- fun.mb(vars[3])
m4b <- fun.mb(vars[4])
m5b <- fun.mb(vars[5])
m6b <- fun.mb(vars[6])
m7b <- fun.mb(vars[7])
m8b <- fun.mb(vars[8])
m9b <- fun.mb(vars[9])
m10b <- fun.mb(vars[10])
m11b <- fun.mb(vars[11])



### Year + GDP ----

fun.mc <- function(g){
  temp <- dfgi%>%filter(!is.na(g))
  formula <- as.formula(paste("erosion.strict ~ ",g," + year + log(gdppc)"))
  m1 <- glm(formula=formula, data = temp, family = binomial)
  coeftest(m1, cluster.vcov(m1, temp$country.name))
}

m1c <- fun.mc(vars[1])
m2c <- fun.mc(vars[2])
m3c <- fun.mc(vars[3])
m4c <- fun.mc(vars[4])
m5c <- fun.mc(vars[5])
m6c <- fun.mc(vars[6])
m7c <- fun.mc(vars[7])
m8c <- fun.mc(vars[8])
m9c <- fun.mc(vars[9])
m10c <- fun.mc(vars[10])
m11c <- fun.mc(vars[11])



### Print tables ----

#### TAB A5. Alternative Inequality Measures (Bivariate) ----

stargazer(m2,m3,m4,m5,m6, title="Logit",digits=3,
          header=F,df=F,omit.stat=c("f","ser","aic","ll","rsq","adj.rsq"),
          add.lines = list(c("Observations", res2$obs[2:6])),
          star.cutoffs=c(0.05,0.01,0.001),
          star.char=c("*","**","***"),
          notes="$^{*} p<0.05$; $^{**} p<0.01$; $^{***} p<0.001$",
          covariate.labels=c("Inequality"),
          column.labels = vars2[2:6],
          notes.append=F,table.placement="h!",
          out="tables/apptab-ineq1-10_27_24.tex")

stargazer(m7,m8,m9,m10,m11, title="Logit",digits=3,
          header=F,df=F,omit.stat=c("f","ser","aic","ll","rsq","adj.rsq"),
          add.lines = list(c("Observations", res2$obs[7:11])),
          star.cutoffs=c(0.05,0.01,0.001),
          star.char=c("*","**","***"),
          notes="$^{*} p<0.05$; $^{**} p<0.01$; $^{***} p<0.001$",
          covariate.labels=c("Inequality"),
          column.labels = vars2[7:11],
          notes.append=F,table.placement="h!",
          out="tables/apptab-ineq2-10_27_24.tex")

#### TAB A6. Alternative Inequality Measures (with GDP) ----

stargazer(m2b,m3b,m4b,m5b,m6b, title="Logit",digits=3,
          header=F,df=F,omit.stat=c("f","ser","aic","ll","rsq","adj.rsq"),
          add.lines = list(c("Observations", res2$obs2[2:6])),
          star.cutoffs=c(0.05,0.01,0.001),
          star.char=c("*","**","***"),
          notes="$^{*} p<0.05$; $^{**} p<0.01$; $^{***} p<0.001$",
          column.labels = vars2[2:6],
          notes.append=F,table.placement="h!",
          out="tables/apptab-ineq3-10_27_24.tex")

stargazer(m7b,m8b,m9b,m10b,m11b, title="Logit",digits=3,
          header=F,df=F,omit.stat=c("f","ser","aic","ll","rsq","adj.rsq"),
          add.lines = list(c("Observations", res2$obs2[7:11])),
          star.cutoffs=c(0.05,0.01,0.001),
          star.char=c("*","**","***"),
          notes="$^{*} p<0.05$; $^{**} p<0.01$; $^{***} p<0.001$",
          column.labels = vars2[7:11],
          notes.append=F,table.placement="h!",
          out="tables/apptab-ineq4-10_27_24.tex")

#### TAB A7. Alternative Inequality Measures (with GDP and Year) ----

stargazer(m2c,m3c,m4c,m5c,m6c, title="Logit",digits=3,
          header=F,df=F,omit.stat=c("f","ser","aic","ll","rsq","adj.rsq"),
          add.lines = list(c("Observations", res2$obs2[2:6])),
          star.cutoffs=c(0.05,0.01,0.001),
          star.char=c("*","**","***"),
          notes="$^{*} p<0.05$; $^{**} p<0.01$; $^{***} p<0.001$",
          column.labels = vars2[2:6],
          notes.append=F,table.placement="h!",
          out="tables/apptab-ineq5-10_27_24.tex")

stargazer(m7c,m8c,m9c,m10c,m11c, title="Logit",digits=3,
          header=F,df=F,omit.stat=c("f","ser","aic","ll","rsq","adj.rsq"),
          add.lines = list(c("Observations", res2$obs2[7:11])),
          star.cutoffs=c(0.05,0.01,0.001),
          star.char=c("*","**","***"),
          notes="$^{*} p<0.05$; $^{**} p<0.01$; $^{***} p<0.001$",
          column.labels = vars2[7:11],
          notes.append=F,table.placement="h!",
          out="tables/apptab-ineq6-10_27_24.tex")


## TAB A8. Top 1% and Next 9% Shares ----

temp <- dfgi%>%
  filter(!is.na(inc.top1),!is.na(wealth.top1))

temp <- temp%>%
  mutate(inc.9099=inc.top10-inc.top1,
         wealth.9099=wealth.top10-wealth.top1)


m1 <- glm(erosion.strict ~ wealth.top1 +
            wealth.9099,data=temp, family=binomial)
crse <- coeftest(m1, cluster.vcov(m1, temp$country.name))

m2 <- glm(erosion.strict ~ inc.top1 +
            inc.9099,data=temp, family=binomial)
crse2 <- coeftest(m2, cluster.vcov(m2, temp$country.name))

m3 <- glm(erosion.strict ~ wealth.top1 +
            wealth.9099 + log(gdppc) + year,data=temp, family=binomial)
crse3 <- coeftest(m3, cluster.vcov(m3, temp$country.name))

m4 <- glm(erosion.strict ~ inc.top1 +
            inc.9099 + log(gdppc) + year ,data=temp, family=binomial)
crse4 <- coeftest(m4, cluster.vcov(m4, temp$country.name))



stargazer(crse,crse2,crse3,crse4, title="Logit",digits=3,
          header=F,df=F,omit.stat=c("f","ser","aic","ll","rsq","adj.rsq"),
          add.lines = list(c("Observations", nobs(crse),nobs(crse2),
                             nobs(crse3),nobs(crse4))),
          star.cutoffs=c(0.1,0.05,0.01,0.001),
          star.char=c("\\dagger","*","**","***"),
          notes="$^{\\dagger} p<0.1$; $^{*} p<0.05$; $^{**} p<0.01$; $^{***} p<0.001$",
          covariate.labels=c("Wealth Top 1%","Wealth Next 9%",
                             "Income Top 1%","Income Next 9%",
                             "Logged GDP per capita","Year"),
          notes.append=F,table.placement="h!",
          out="tables/apptab-ineq9099-10_27_24.tex")



## SAMPLE INFO ----

# cases lost in wb/wid data:
t1 <- dfgi%>%
  group_by(country.name)%>%
  summarise(missing.wb=mean(is.na(wb.gini)),
            missing.wid=mean(is.na(wid.gini)),
            eroder=max(eroder,na.rm=T))

## AUC ----

auc <- function(temp){
  pos.scores <- temp%>%
    filter(!is.na(pred1))%>%
    filter(erosion.strict==1)%>%
    pull(pred1)
  
  neg.scores <- temp%>%
    filter(!is.na(pred1))%>%
    filter(erosion.strict==0)%>%
    pull(pred1)
  
  # estimate AUC
  set.seed(135102)
  reps <- 1000000
  mean(sample(pos.scores,reps,replace=T) > sample(neg.scores,reps,replace=T))
}

m1 <- glm(erosion.strict ~ inc.bot50 + log(gdppc) + year, 
          data=temp, family="binomial")
temp$pred1 <- predict(m1, temp, type="response")
auc(temp)
# AUC: 0.8048

m1 <- glm(erosion.strict ~ inc.top10 + log(gdppc) + year, 
          data=temp, family="binomial")
temp$pred1 <- predict(m1, temp, type="response")
auc(temp)
# AUC: 0.8075

m1 <- glm(erosion.strict ~ inc.top1 + log(gdppc) + year, 
          data=temp, family="binomial")
temp$pred1 <- predict(m1, temp, type="response")
auc(temp)
# AUC: 0.8090

m1 <- glm(erosion.strict ~ wealth.bot50 + log(gdppc) + year, 
          data=temp, family="binomial")
temp$pred1 <- predict(m1, temp, type="response")
auc(temp)
# AUC: 0.8081

m1 <- glm(erosion.strict ~ wealth.top10 + log(gdppc) + year, 
          data=temp, family="binomial")
temp$pred1 <- predict(m1, temp, type="response")
auc(temp)
# AUC: 0.8146

m1 <- glm(erosion.strict ~ wealth.top1 + log(gdppc) + year, 
          data=temp, family="binomial")
temp$pred1 <- predict(m1, temp, type="response")
auc(temp)
# AUC: 0.81798

